# File:     JOP_RR1_financial_topics.R
# Purpose:  This script applies Nicolo's financial topics to the data
# Input:    /Data/finalData.RData
# Output:   
# Author:   JB


rm(list = ls())
require(tidyverse)
require(ggridges)
# setwd('C:/Users/Jimbo/Dropbox/FED/FED/Paper/JOP/RR1_replication/')
setwd('D:/Dropbox/FED/FED/Paper/JOP/RR1_replication/')

load('./data/finalData.RData')

toplot <- NULL
load('./output/chatGPT_polite_pairwise_2000.RData')

toplot <- res%>%
  as_tibble() %>%
  mutate(morePolite = gsub('\\.|Conversation |The most polite interaction is Conversation | is the most polite','',morePolite)) %>%
  count(morePolite,speaker2) %>%
  filter(!grepl(':',morePolite)) %>%
  spread(morePolite,n) %>%
  mutate(share = `2` / (`1` + `2`)) %>%
  mutate(type = 'Longer conversations')


load('./output/chatGPT_polite_pairwise_1000.RData')
toplot <- toplot %>%
  bind_rows(res%>%
              as_tibble() %>%
              mutate(morePolite = gsub('\\.|Conversation |The most polite interaction is Conversation | is the most polite','',morePolite)) %>%
              count(morePolite,speaker2) %>%
              filter(!grepl(':',morePolite)) %>%
              spread(morePolite,n) %>%
              mutate(share = `2` / (`1` + `2`)) %>%
              mutate(type = 'Shorter conversations'))

pdf('./output/figures/polite_simple_comparison.pdf',width = 7,height = 5)
toplot %>%
  ggplot(aes(x = share,y = speaker2,fill = type)) + 
  geom_bar(stat = 'identity',position = 'dodge') + 
  geom_vline(xintercept = .5,linetype = 'dashed') + 
  labs(x = '% of matches where randomly chosen conversation involving Yellen\nis less polite than randomly chosen conversation involving male Fed chair',
       y = 'Male Fed Chairs',
       fill = 'Conversation Type',
       title = 'AI-Annotated Results',
       subtitle = 'Comparing politeness of randomly chosen conversations via Chat-GPT')
dev.off()


require(BradleyTerry2)
rm(list = ls())
load('./output/chatGPT_polite_BTM_aggression.RData')
library(openai)
API <- 'JC'

if(API == 'JC') {
  Sys.setenv(OPENAI_API_KEY = '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') # Josh
  cat('Josh\n')
} else {
  Sys.setenv(OPENAI_API_KEY = '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') # james.h.bisbee@vanderbilt.edu #2
  cat('Jim\n')
}
# Sys.setenv(OPENAI_API_KEY = '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') # jhb362@nyu.edu #1


create_prompt <- function(chunk) {
  res <- list(
    list(
      "role" = "system",
      "content" = "You are a helpful AI assistant."
    ),
    list(
      "role" = "user",
      "content" = stringr::str_c(
        'The following is a summary assessment of which of two conversations contains more aggressive language. 
        Please extract just the number of the conversation that is deemed to be more aggressive.
        If there is not enough information provide, return a -1. Do not return any text in your response.\n"',
        chunk,'"')
    )
  )
  return(res)
}

submit_openai <- function(prompt, temperature = 0, n = 1) {
  res <- openai::create_chat_completion(model = "gpt-3.5-turbo",
                                        messages = prompt,
                                        temperature = temperature,
                                        n = n)
  Sys.sleep(1)
  res
}

cleanRes <- list()
# res$cleaned <- NA
for(i in which(is.na(res$cleaned))) {
  Sys.sleep(2)
  cat('----------------------------\n',i,'\n----------------------------\n')
  prompts <- create_prompt(res %>% slice(i) %>% pull(explanation))
  
  system.time(openai_completions <- try(submit_openai(prompt = prompts,temperature = 0,n = 1)))
  while(class(openai_completions) == 'try-error') {
    Sys.sleep(5)
    system.time(openai_completions <- try(submit_openai(prompt = prompts,temperature = 0,n = 1)))
  }
  cleanRes[[i]] <- openai_completions$choices$message.content
  res$cleaned[i] <- cleanRes[[i]]
}

save(res,file = './output/chatGPT_polite_BTM_aggression_cleaned.RData')



load('./output/chatGPT_polite_BTM_aggression_cleaned.RData')

res <- res %>%
  mutate(cleaned = as.numeric(str_extract(cleaned,'(-)*\\d'))) %>%
  mutate(cleaned = ifelse(cleaned == 0,-1,cleaned))

tab <- res %>%
  filter(cleaned != -1) %>%
  filter(direction == 'more',!reversed) %>%
  # select(fed_1,fed_2,cleaned) %>%
  mutate(moreAgg = ifelse((cleaned == 1 & direction == 'more') | 
                            (cleaned == 2 & direction == 'less'),fed_1,fed_2)) %>%
  mutate(lessAgg = ifelse((cleaned == 1 & direction == 'more') |
                            (cleaned == 2 & direction == 'less'),fed_2,fed_1)) %>%
  mutate(length = ifelse(n_1 < 5,'3-4',
                         ifelse(n_1 < 7,'5-6',
                                ifelse(n_1 < 9,'7-8','9+')))) %>%
  # mutate(length = n_1) %>%
  # filter(fed_1 == 'Bernanke' | fed_2 == 'Bernanke') %>%
  count(moreAgg,lessAgg,length)

pdf('./output/figures/SI_gpt_descriptive.pdf',width = 8,height = 4)
tab %>%
  mutate(dyadID = ifelse((moreAgg == 'Bernanke' & lessAgg == 'Greenspan') | 
                           (moreAgg == 'Greenspan' & lessAgg == 'Bernanke'),'Bernanke_Greenspan',
                         ifelse((moreAgg == 'Bernanke' & lessAgg == 'Powell') | 
                                  (moreAgg == 'Powell' & lessAgg == 'Bernanke'),'Bernanke_Powell',
                                ifelse((moreAgg == 'Bernanke' & lessAgg == 'Yellen') | 
                                         (moreAgg == 'Yellen' & lessAgg == 'Bernanke'),'Bernanke_Yellen',
                                       ifelse((moreAgg == 'Greenspan' & lessAgg == 'Powell') | 
                                                (moreAgg == 'Powell' & lessAgg == 'Greenspan'),'Greenspan_Powell',
                                              ifelse((moreAgg == 'Greenspan' & lessAgg == 'Yellen') | 
                                                       (moreAgg == 'Yellen' & lessAgg == 'Greenspan'),'Greenspan_Yellen','Powell_Yellen')))))) %>%
  group_by(dyadID,length) %>%
  mutate(tot = sum(n)) %>%
  ungroup() %>%
  mutate(share = n / tot) %>%
  group_by(dyadID) %>%
  # filter(share == max(share)) %>%
  select(-dyadID,-tot,-n) %>%
  mutate(lessAgg = factor(lessAgg,levels = rev(c('Bernanke','Greenspan','Powell','Yellen'))),
         moreAgg = factor(moreAgg,levels = (c('Bernanke','Greenspan','Powell','Yellen')))) %>%
  # spread(lessAgg,share) 
  ggplot(aes(x = lessAgg,y = moreAgg,fill = share)) + 
  geom_tile() + 
  geom_text(aes(label = paste0(round(share*100,0),'%')),size =3.5) + 
  scale_fill_gradient2(midpoint = .5,low = 'darkred',mid = 'white',high = 'darkgreen') + 
  theme_bw()+
  labs(x = 'Less aggressive conversations',
       y = 'More aggressive conversations',
       fill = 'Proportion of\ncomparisons',
       title = 'AI-annotated aggression',
       subtitle = 'Proportion of comparisons (y-axis to x-axis)\nthat are more aggressive by conversation length (facets)') + 
  facet_grid(~length) + 
  theme(legend.position = 'none',
        axis.text.x = element_text(angle = 45,hjust = 1))
dev.off()

require(fixest)

feols(cleaned == 2 ~ fed_2 + log(nchars_1) + log(nchars_2) | chamber_1 + chamber_2 + reversed + factor(n_1) + fed_1,res %>%
        filter(direction == 'more') %>%
        mutate(year_1 = lubridate::year(date_1),
               year_2 = lubridate::year(date_2)),cluster = 'year_1')

feols(cleaned == 1 ~ fed_1 + log(nchars_1) + log(nchars_2) | chamber_1 + chamber_2 + reversed + factor(n_1) + fed_2,res %>%
        filter(direction == 'more') %>%
        mutate(year_1 = lubridate::year(date_1),
               year_2 = lubridate::year(date_2)),cluster = 'date_1')


# BT WORK
res %>%
  as_tibble() %>%
  mutate(choice = cleaned,
         year_1 = lubridate::year(date_1),
         year_2 = lubridate::year(date_2)) %>%
  filter(choice != -1)

aggressive.sf <- res %>%
  as_tibble() %>%
  filter(cleaned != -1) %>%
  mutate(choice = cleaned,
         year_1 = lubridate::year(date_1),
         year_2 = lubridate::year(date_2)) %>%
  mutate(period_1 = ifelse(year_1 < 2006,'Greenspan',
                           ifelse(year_1 < 2014,'Bernanke',
                                  ifelse(year_1 < 2018,'Yellen','Powell')))) %>%
  mutate(period_2 = ifelse(year_2 < 2006,'Greenspan',
                           ifelse(year_2 < 2014,'Bernanke',
                                  ifelse(year_2 < 2018,'Yellen','Powell')))) %>%
  # mutate(nchars_1 = ifelse(nchars_1 < 1000,1,
  #                          ifelse(nchars_1 < 2000,2,
  #                                 ifelse(nchars_1 < 4000,3,4))),
  #        nchars_2 = ifelse(nchars_2 < 1000,1,
  #                          ifelse(nchars_2 < 2000,2,
  #                                 ifelse(nchars_2 < 4000,3,4)))) %>%
  # filter(fed_1 == 'Bernanke',fed_2 == 'Greenspan',direction == 'less',!reversed,chamber_1 == 'House',chamber_2 == 'House',n_1 == 3,nchars_1 == 181,year_1 == 2012)
  count(fed_1,fed_2,choice,direction,reversed,chamber_1,chamber_2,n_1,n_2,nchars_1,nchars_2,year_1,year_2,period_1,period_2) %>%
  spread(choice,n,sep='_') %>%
  # filter(`choice_-1` != 1) %>%
  # select(-`choice_-1`) %>%
  mutate_at(vars(choice_1,choice_2),function(x) ifelse(is.na(x),0,x))

aggressive.sf %>%
  print(n = 30)

aggressive.sf2 <- aggressive.sf %>%
  filter(direction == 'more',reversed) %>%
  mutate(fed_1 = factor(fed_1,levels = c('Yellen','Greenspan','Bernanke','Powell')),
         fed_2 = factor(fed_2,levels = c('Yellen','Greenspan','Bernanke','Powell')))

aggressive.sf2$fed_1 <- data.frame(fed = aggressive.sf2$fed_1,
                              chamber = aggressive.sf2$chamber_1,
                              year = aggressive.sf2$year_1,
                              nchars = aggressive.sf2$nchars_1,
                              period = aggressive.sf2$period_1)
aggressive.sf2$fed_2 <- data.frame(fed = aggressive.sf2$fed_2,
                              chamber = aggressive.sf2$chamber_2,
                              year = aggressive.sf2$year_2,
                              nchars = aggressive.sf2$nchars_2,
                              period = aggressive.sf2$period_2)


summary(aggressiveModel <- BTm(cbind(choice_1,choice_2),fed_1,fed_2, ~fed + chamber + factor(year) + log(nchars),
                   id = 'fed',data = aggressive.sf2))


summary(aggressiveModel)

summary(update(aggressiveModel,br=T))

BTabilities(aggressiveModel)

require(qvcalc)

qv <- qvcalc(BTabilities(aggressiveModel))


pdf('./output/figures/SI_BT_results.pdf',width = 7,height = 5)
qv$qvframe %>%
  mutate(chair = row.names(.)) %>%
  as_tibble() %>%
  ggplot(aes(x = estimate,y = reorder(chair,estimate))) + 
  geom_point(size = 3) + 
  geom_errorbarh(aes(xmin = estimate - 1.96*quasiSE,xmax = estimate + 1.96*quasiSE),height = 0) + 
  geom_errorbarh(aes(xmin = estimate - 1.65*quasiSE,xmax = estimate + 1.65*quasiSE),height = 0,size = 1.2) + 
  geom_vline(xintercept = 0,linetype = 'dashed') + 
  theme_bw() + 
  labs(x = 'More aggressive conversations',y = 'Fed Chair',
       title = 'Bradley-Terry measure of aggression',
       subtitle = 'Estimated aggression relative to Yellen')
dev.off()

aggressiveModel$coefficients


BTm()

  
res %>%
  count(n_1)
tab <- res %>%
  filter(cleaned != -1) %>%
  filter(direction == 'more',!reversed,n_1 < 8) %>%
  select(fed_1,fed_2,cleaned) %>%
  mutate(moreAgg = ifelse(cleaned == 1,fed_1,fed_2)) %>%
  mutate(lessAgg = ifelse(cleaned == 1,fed_2,fed_1)) %>%
  # filter(fed_1 == 'Bernanke' | fed_2 == 'Bernanke') %>%
  count(moreAgg,lessAgg)

tab %>%
  mutate(dyadID = ifelse((moreAgg == 'Bernanke' & lessAgg == 'Greenspan') | 
                           (moreAgg == 'Greenspan' & lessAgg == 'Bernanke'),'Bernanke_Greenspan',
                         ifelse((moreAgg == 'Bernanke' & lessAgg == 'Powell') | 
                                  (moreAgg == 'Powell' & lessAgg == 'Bernanke'),'Bernanke_Powell',
                                ifelse((moreAgg == 'Bernanke' & lessAgg == 'Yellen') | 
                                         (moreAgg == 'Yellen' & lessAgg == 'Bernanke'),'Bernanke_Yellen',
                                       ifelse((moreAgg == 'Greenspan' & lessAgg == 'Powell') | 
                                                (moreAgg == 'Powell' & lessAgg == 'Greenspan'),'Greenspan_Powell',
                                              ifelse((moreAgg == 'Greenspan' & lessAgg == 'Yellen') | 
                                                       (moreAgg == 'Yellen' & lessAgg == 'Greenspan'),'Greenspan_Yellen','Powell_Yellen')))))) %>%
  group_by(dyadID) %>%
  mutate(tot = sum(n)) %>%
  ungroup() %>%
  mutate(share = n / tot) %>%
  group_by(dyadID) %>%
  # filter(share == max(share)) %>%
  select(-dyadID,-tot,-n) %>%
  mutate(lessAgg = factor(lessAgg,levels = c('Greenspan','Bernanke','Powell','Yellen')),
         moreAgg = factor(moreAgg,levels = c('Greenspan','Bernanke','Powell','Yellen'))) %>%
  # spread(lessAgg,share) 
  ggplot(aes(x = lessAgg,y = moreAgg,fill = share)) + 
  geom_tile() + 
  geom_text(aes(label = paste0(round(share*100,1),'%'))) + 
  scale_fill_gradient2(midpoint = .5,low = 'darkred',mid = 'white',high = 'darkgreen') + 
  theme_bw()+
  labs(x = 'Less aggressive conversations',
       y = 'More aggressive conversations',
       fill = 'Proportion of\ncomparisons')


toAnal <- res %>%
  filter(cleaned != -1) %>%
  # filter(direction == 'more',!reversed,n_1 < 9) %>%
  # select(fed_1,fed_2,cleaned) %>%
  mutate(moreAgg = ifelse((cleaned == 1 & direction == 'more') |
                            (cleaned == 2 & direction == 'less'),fed_1,fed_2)) %>%
  mutate(lessAgg = ifelse((cleaned == 1 & direction == 'more') |
                            (cleaned == 2 & direction == 'less'),fed_2,fed_1))

toAnal %>%
  filter(fed_1 == 'Yellen' | fed_2 == 'Yellen',
         direction == 'more',reversed) %>%
  summarise(mean(moreAgg == 'Yellen'))

summary(lm(moreAgg == 'Yellen' ~ lessAgg,toAnal %>%
             filter(lessAgg != 'Yellen')))

res %>%
  filter(cleaned != -1) %>%
  filter(direction == 'less') %>%
  select(fed_1,fed_2,cleaned) %>%
  mutate(moreAgg = ifelse(cleaned == 2,fed_1,fed_2)) %>%
  mutate(lessAgg = ifelse(cleaned == 2,fed_2,fed_1)) %>%
  # filter(fed_1 == 'Bernanke' | fed_2 == 'Bernanke') %>%
  count(moreAgg,lessAgg)

res %>%
  filter(cleaned != -1) %>%
  filter(direction == 'less') %>%
  select(fed_1,fed_2,cleaned) %>%
  mutate(moreAgg = ifelse(cleaned == 1,fed_2,fed_1)) %>%
  filter(fed_1 == 'Bernanke' | fed_2 == 'Bernanke') %>%
  count(moreAgg)


res %>%
  filter(!reversed,direction == 'more') %>%
  rename(choice = cleaned) %>%
  filter(choice != -1) %>%
  count(fed_1,fed_2,choice,direction,reversed) %>%
  spread(choice,n,sep='_') %>%
  group_by(fed_1,fed_2,direction,reversed) %>%
  summarise(choice_1 = sum(choice_1,na.rm=T),
            choice_2 = sum(choice_2,na.rm=T)) %>%
  # filter(fed_1 == 'Yellen') %>%
  ungroup() %>%
  gather(choice,value,-fed_1,-fed_2,-direction,-reversed) %>%
  ggplot(aes(x = value,y = fed_2,fill = choice)) + 
  geom_bar(stat = 'identity',position = 'fill') + 
  facet_grid(fed_1~direction,scales = 'free') + 
  geom_text(stat = )

res
